* Steps: 
*** 1) MERGE VITAL STATISTICS WITH CENSUS 
*** 2) MERGE WITH POPULATION FORECAST 
*** 3) MERGE WITH RENAMU - FOR APPENDIX
*** 4) MERGE WITH GEO DATA
*** 5) MERGE WITH SIAF EXPENDITURE DATA - FOR APPENDIX
*** 6) MERGE WITH POLITICAL DATA - FOR APPENDIX

*------------------------------------------------------------------------------
*1) MERGE VITAL STATISTICS WITH CENSUS 
*------------------------------------------------------------------------------		

use "$data/vitalstats", clear 

*Create IDs
gen dep=substr(ubigeo,1,2)
gen prov=substr(ubigeo,1,4)

*Fill-in gaps in jurisdiction
bysort dep(departamento): replace departamento = departamento[_N] if departamento=="" 
bysort prov (provincia) : replace provincia = provincia[_N] if provincia=="" 
replace provincia="MAYNAS" if prov=="1608" //update provincia ubigeo
bysort ubigeo (distrito): replace distrito = distrito[_N] if distrito=="" 

drop dep prov

save "$data/district_setup.dta", replace


*CENSUS DATA: 2005-2007-2017
**********************************
use "$data/district_setup.dta", clear
sort ubigeo year
merge 1:1 ubigeo year using "$data/censo.dta", gen(m_cens)
drop if m_cens==2
drop m_cens
rename hh_piped dis_piped
lab var dis_piped "Share HH piped water"
lab var dis_hh_onsite "Share HH latrine"
lab var dis_hh_od "Share HH open defecation"
lab var dis_hh_sewer "Share HH sewerage"
lab var dis_elecp "Share HH electricity"
lab var dis_educ_secp "Share HH head secondary"
lab var dis_educ_secomp "Share HH head completed secondary"

save "$data/district_setup.dta", replace

*CENSUS DATA: 2005 baseline
**********************************
use "$data/district_setup.dta", clear
sort ubigeo 
merge m:1 ubigeo using "$data/censo05.dta", gen(m_cens)
drop if m_cens==2
drop m_cens
save "$data/district_setup.dta", replace


*CENSUS DATA: 2005 baseline - u1y and u5y pop
************************************************
use "$data/censo05_popu1y5y.dta", clear

*Create new variables
rename pop_u1y pop_u1y05
rename pop_u5y pop_u5y05
gen pop_u1y5y05=pop_u1y05/pop_u5y05
lab var pop_u1y05 "Infant population (census 2005)"
lab var pop_u5y05 "Under-five population (census 2005)"
lab var pop_u1y5y05 "Share infant from under-five population (census 2005)"
tempfile censo05_popu1y5y
save `censo05_popu1y5y'

use "$data/district_setup.dta", clear
sort ubigeo 
merge m:1 ubigeo using `censo05_popu1y5y', gen(m_cens)
drop if m_cens==2
drop m_cens

save "$data/district_setup.dta", replace


*------------------------------------------------------------------------------
*2) MERGE WITH POPULATION FORECAST 
*------------------------------------------------------------------------------		

use "$data/district_setup.dta", clear
sort ubigeo
merge 1:1 ubigeo year using "$data/ineipop.dta", nogen
lab var inei_pop "Population (forecasts)"
save "$data/district_setup.dta", replace


*------------------------------------------------------------------------------
*3) MERGE WITH RENAMU
*------------------------------------------------------------------------------	

use "$data/renamu.dta", clear
keep if year<=2015
sort ubigeo year
lab var renamu_int "Internet connectivity"
lab var renamu_atform "Technical support"
lab var renamu_pia "Municipal revenue (millions)"
lab var renamu_lnpia "Municipal revenue (log)"
tempfile renamu
save `renamu'

use "$data/district_setup.dta", clear
sort ubigeo year
merge 1:1 ubigeo year using `renamu', gen(m_renamu)
drop if m_renamu==2
drop m_renamu

*Imput missing in RENAMU variables with district mean
egen ubigeonew = group(ubigeo) if renamu_lnpia!=.

qui{
forvalues n=1/1838 { 
foreach var of varlist renamu_dhealth renamu_atform renamu_int renamu_lnpia {
sum `var' if ubigeonew==`n'
replace `var'=`r(mean)' if `var'==. & ubigeonew==`n'
}
}
}

drop ubigeonew

save "$data/district_setup", replace



*------------------------------------------------------------------------------
*4) MERGE WITH GEO DATA
*------------------------------------------------------------------------------	

sort ubigeo
merge m:1 ubigeo using "$data/geo.dta", gen(m_geo)
drop if m_geo==2

*Transform area from mms to kms
replace geo_area=geo_area/1000000

*Population density
gen geo_pop_dens=inei_pop/geo_area

*Population density for 2005
gen geo_pop_dens05X=geo_pop_dens if year==2005
bysort ubigeo: egen geo_pop_dens05=max(geo_pop_dens05X)
drop geo_pop_dens05X

*Label variables
lab var geo_pop_dens05 "Population density ($\frac{pop}{km^2}$)"
lab var geo_pop_dens "Population density ($\frac{pop}{km^2}$)"
lab var river_leng "River density ($\frac{km}{km^2}$)"
lab var geo_slope1_perc "Share district gradient below 0.8\%"
lab var geo_slope2_perc "Share district gradient \textbraceleft{0.8-4.19]}\%"
lab var geo_slope3_perc "Share district gradient \textbraceleft{4.19-13]}\%"
lab var geo_slope4_perc "Share district gradient above 13\%"
lab var geo_elev_250perc "Share district elevation below 250 mamls"
lab var geo_elev_251500perc "Share district elevation \textbraceleft{250-500]} mamls"
lab var geo_elev_5011000perc "Share district elevation \textbraceleft{500-1000]} mamls"
lab var geo_elev_1001perc "Share district elevation above 1000 mamls"
lab var geo_area "District area (sq. km)"

save "$data/district_setup.dta", replace


*------------------------------------------------------------------------------
*5) MERGE WITH SIAF EXPENDITURE DATA 
*------------------------------------------------------------------------------	

use "$data/district_setup", clear
sort ubigeo
merge m:1 ubigeo year using "$data/siaf_expenditure", gen(m_siaf)
drop if m_siaf==2
drop m_siaf

*Drop variables from that won't need for Appendix
drop pia* pim*

rename ejecucion1 siaf_ejc_trans
rename ejecucion2 siaf_ejc_energ
rename ejecucion3 siaf_ejc_salud

*Impute missings in SIAF variables

local n "ejc"
foreach u of local n {

bysort ubigeo: egen max_`u'_trans=max(siaf_`u'_trans)
bysort ubigeo: egen max_`u'_energ=max(siaf_`u'_energ)
bysort ubigeo: egen max_`u'_salud=max(siaf_`u'_salud)
}

local v "trans energ salud"
foreach y of local v {
replace siaf_ejc_`y'=0 if siaf_ejc_`y'==. & max_ejc_`y'!=.
cap drop max_ejc_`y'
} 

keep if year>=2005 & year<=2015

save "$data/district_setup", replace


*------------------------------------------------------------------------------
*6) MERGE WITH POLITICAL DATA
*------------------------------------------------------------------------------	

use "$data/jne_indic", clear

qui gen departamento = upper(ustrto(ustrnormalize(DEPARTAMENTO, "nfd"), "ascii", 2)) 
qui gen provincia = upper(ustrto(ustrnormalize(PROVINCIA, "nfd"), "ascii", 2)) 
qui gen distrito = upper(ustrto(ustrnormalize(DISTRITO, "nfd"), "ascii", 2)) 

egen concpol=concat(departamento provincia distrito)
sort concpol mandate_year
egen npol=group(concpol)

*Turnover
recode reelection (0=1) (1=0), gen(turnover)
keep departamento provincia distrito concpol npol mandate_year reelection turnover pct_reelection_1993

tempfile political
save `political'


use "$data/district_setup", clear

* New departamento, provincia, distrito variables without special characters
replace departamento = upper(ustrto(ustrnormalize(departamento, "nfd"), "ascii", 2)) 
replace departamento = "CALLAO" if departamento == "P. C. DEL CALLAO"
replace provincia = upper(ustrto(ustrnormalize(provincia, "nfd"), "ascii", 2)) 
replace provincia = "ANTONIO RAIMONDI" if provincia=="ANTONIO RAYMONDI"
replace distrito = upper(ustrto(ustrnormalize(distrito, "nfd"), "ascii", 2)) 

* Merge w/ electoral data -------------
*--------------------------------------
cap drop mandate_year
gen mandate_year=.
replace mandate_year=2006 if year<=2006
replace mandate_year=2010 if year>2006 & year<=2010
replace mandate_year=2014 if year>2010 & year<=2014
replace mandate_year=2018 if year>2014 & year<=2018
lab var mandate_year "Mandate year"

egen concjmp=concat(departamento provincia distrito)
egen njmp=group(concjmp)

merge m:1 departamento provincia distrito mandate_year using `political', gen(merge_pol)

tempfile matched
save `matched'


* GET MISSING UBIGEOS WITH MATCHIT
*--------------------------------------
*POLITICAL
*------------
preserve
keep if merge_pol==2
//one observation per ubigeo
collapse (first) departamento provincia distrito concpol, by(npol)
tempfile political
save `political'

restore

*DATA
*------------
preserve
keep if merge_pol==1
//one observation per ubigeo
collapse (first) departamento provincia distrito concjmp, by(njmp)
tempfile jmp
save `jmp'
restore

*MATCHIT
*----------
use `jmp', clear
matchit njmp concjmp using `political', idusing(npol) txtusing(concpol)
bysort njmp: egen similmax=max(similscore)
keep if similscore==similmax
keep if similscore>=0.85
drop if regexm(concjmp, "PAJARILLO")
drop simil*
sort njmp
tempfile matchit
save `matchit'


*MERGE BACK MATCHIT
*------------------
use `matched', clear
drop npol concpol
drop if merge_pol==2
merge m:1 njmp using `matchit', keep(1 3) nogen
merge m:1 npol using `political', keep(1 3) nogen
drop merge_pol npol concpol
sort ubigeo year
drop concjmp njmp

lab var pct_reelection_1993 "Municipal re-election rate (Mayor) since 1993"
lab var turnover "Political turnover (Mayor)"

save "$data/district_setup", replace


